% scribe: Timothy Wu
% lastupdate: Oct. 19, 2005
% lecture: 13
% title: Lindeberg's Theorem and Helly-Bray Selection Principle
% references: Durrett (2nd ed), section 2.4 and 2.2
% keywords: triangular arrays, triangular array conditions, convergence in distribution, Lindeberg's condition, Lindeberg's theorem, Feller's theorem, uniformly asymptotically negligible, Lyapounov's condition, Lyapounov's theorem, Levy metric, Extended Distribution Function, Tightness
% end

\documentclass[12pt, letterpaper]{article}
\include{macros}

% Extra Macros

\newcommand{\law}{\ensuremath{\mathcal{L}}}
\newcommand{\normal}{\ensuremath{\mathcal{N}}}
\newcommand{\tozero}{\ensuremath{\rightarrow 0}}
\newcommand{\eps}{\ensuremath{\epsilon}}
\newcommand{\ind}[1]{\1 \left(#1\right)}
% Fairly local macros
\newcommand{\threecond}{the Triangular Array Conditions}
\newcommand{\bs}{\ensuremath{\mathbf{s}}}

\begin{document}
\lecture{13}{Lindeberg's Theorem and the Helly-Bray Selection
Principle} {Timothy Wu}{tcb6402@berkeley.edu}

This set of notes is a revision of the work of Lawrence Christopher
Evans and David S.\ Rosenberg.

\begin{abstract}
In this lecture we begin with a review of Lindeberg's Theorem and its 
applications.  We then build up the tools used in the Helly-Bray Selection 
Principle and we finish with its proof. The lecture provided here corresponds 
with sections 2.2 and 2.4 of \cite{durrett}.
\end{abstract}

\section{Triangular Arrays}

  Roughly speaking, a sum of many small independent
  random variables will be nearly normally distributed.
  To formulate a limit theorem of this kind, we must consider
  sums of more and more smaller and smaller random variables.
  Therefore, throughout this section we shall study the sequence of sums
  $$
  S_i\ =\ \sum_j X_{ij} 
  $$

obtained by summing the rows of a \emph{triangular array} of random
variables
$$
\begin{array}{l}
  X_{11},X_{12},\ldots,X_{1n_1} \\
  X_{21},X_{22},\ldots \ldots,X_{2n_2} \\
  X_{31},X_{32},\ldots \ldots \ldots,X_{3n_3} \\
  \vdots \hspace{.5in} \vdots \hspace{.5in} \vdots  \hspace{.5in} \vdots
\end{array}.
$$

It will be assumed throughout that triangular arrays satisfy 3
\emph{Triangular Array Conditions\footnote{This is not standard
    terminology, but is used here as a simple referent for these
    conditions.}}:
\begin{enumerate}
\item \label{indeprows} for each $i$, the $n_i$ random variables
  $X_{i1},X_{i2},\ldots,X_{in_{i}}$ in the $i$th row are mutually
  independent;
\item \label{zeromean}$\E(X_{ij})=0$ for all $i,j$; and
\item \label{varsumone}$\sum_{j} \E X_{ij}^2=1$ for all $i$.
\end{enumerate}


Here the row index $i$ should always be taken to range over
$1,2,3,\ldots$, while the column index $j$ ranges from $1$ to $n_i$.
It is \emph{not} assumed that the random variables in each row are identically
distributed, and it is \emph{not} assumed that different rows are
independent. (Different rows could even be defined on different
probability spaces.)  It will usually be the case that
$n_1<n_2<\cdots$, whence the term triangular.  It is not necessary
to assume this however.

\section{Lindeberg's Theorem}

We will write $\law(X)$ to denote the \emph{law} or
\emph{distribution} of a random variable $X$. $\normal(0,\sigma^2)$
is the normal distribution with mean $0$ and variance $\sigma^2$.  Recall:

\begin{theorem}[Lindeberg's Theorem] Suppose that in addition to
  \threecond, the triangular array satisfies \emph{Lindeberg's
  Condition}:
\begin{equation}
\label{lindeberg} \forall \eps>0,\, \lim_{i\toinf} \sum_{j=1}^{n_i}
\E [X_{ij}^2
  \ind{|X_{ij}|>\eps}]=0
\end{equation}
Then, as $i\toinf$, $\law(S_i)\rightarrow \normal(0,1)$.
\end{theorem}

\subsection{Applications}
Let $S_n = X_1+X_2+\cdots+X_n$ where $X_1,X_2,\ldots$ is a sequence
of independent, possibly non-identically distributed r.v.s, each
with mean $0$.  Let $\var X = \sigma_j^2$ and
$\bs_n^2=\sum_{j=1}^n \sigma_j^2$. We want to know when
$\law(S_i/\bs_i)\rightarrow\normal(0,1)$. To this end, we check Lindeberg's 
condition
for the triangular array $X_{ij}=X_j/\bs_i,\;j=1,2,\ldots,i$. Then
$S_i$ in the Lindeberg CLT is replaced by $S_i/\bs_i$, and the
Lindeberg condition becomes
\begin{eqnarray}
  \lim_{n \toinf} \sum_{j=1}^n \E \left[ \frac{X_j^2}{\bs_n^2}
  \ind{\left|\frac{X_j}{\bs_n} \right|>\eps}\right]
&=& 0,\mbox{ for all }\eps>0, \\
\mbox{i.e. }  \lim_{n \toinf} \frac{1}{\bs_n^2} \sum_{j=1}^n \E
\left[
  X_j^2  \ind{|X_j|>\eps \bs_n} \right]
&=& 0,\text{ for all }\eps>0.
\end{eqnarray}

Examples where the Lindeberg condition holds:
\begin{enumerate}
\item The i.i.d.\ case where $\bs_n^2 = n \sigma^2$:
  $$
  \frac{1}{n\sigma^2} \sum_{j=1}^n \E [X_j^2 \ind{|X_j|>\eps \sigma
    \sqrt{n}}]
  = \frac{1}{\sigma^2} \E [X_1^2 \ind{|X_1|>\eps \sigma \sqrt{n}}],
  $$
  and since $\E X_1^2 <\infty$, we can use the dominated convergence
  theorem to conclude that the Lindeberg condition holds.
\item Lyapounov's condition
  $$
  \lim_{n\toinf} \frac{1}{\bs_n^{2+\delta}} \sum_{j=1}^n \E
  |X_j|^{2+\delta}=0 \text{ for some } \delta>0
  $$
  implies Lindeberg's condition. The proof of this is given
  (essentially) in the previous lecture.
\item If $X_1,X_2,\ldots$ are uniformly bounded: $|X_j|\leq M$ for all
  $j$, and $\bs_n\uparrow \infty$. Fix $\eps >0$. For $n$ so large
  that $\bs_n \geq M/\eps $, we have
  $$
  \ind{|X_j|>\eps \bs_n} = \ind{|X_j|>M}=0\text{ for all }j.
  $$
  Hence the Lindeberg condition is satisfied.

\section{Extended Distribution Functions}

Extended distribution functions are an extension of distribution
functions to the case where we allow mass to exist at $\pm\infty$.
In the case of a cumulative distribution function $F$, we require
that $\lim_{x \rightarrow \infty} F(x) = 1$ and $\lim_{x \rightarrow
-\infty} F(x) = 0$. For the extended distribution function we relax this
condition. This is convenient, as the limit of distribution
functions is often not a proper cumulative distribution function.

\begin{example}
    Let $F_{n} = \delta_{n}$, the delta measure at n. Then as 
$n\rightarrow\infty$,
    $F_{n}\Rightarrow 0$. However, $0$ is not a cumulative distribution 
function since
    $\lim_{x \rightarrow \infty} 0\neq1$
\end{example}
    So to deal with the case of mass at $\pm\infty$, we define the extended 
distribution function.

\begin{definition}[Extended Distribution Function]
    A function $F:\Re\rightarrow[0,1]$ which is right continuous and 
nondecreasing is called an \emph{extended distribution function}. We 
define $F(-\infty):=\lim_{x \downarrow -\infty}F(x)$ and
    $F(\infty):=\lim_{x \uparrow \infty}F(x)$ and thus there is a bijection 
between extended distribution functions and probability measures on 
$[-\infty,\infty]$ by the relation $\mu[-\infty,x]=F(x)$ for all $x<\infty$, 
and $\mu[x,\infty] = 1-F(x)$.
\end{definition}

    Note here that if Y has extended distribution function $F$, 
$F(-\infty)=P(Y=-\infty)$ but $F(\infty)=1-P(Y=\infty)$. Also note that for 
any e.d.f.\ $F$, if
    $F(\infty)=1$ and $F(-\infty)=0$, F is simply a c.d.f.
    We see now that although the function $F\equiv0$ that we encountered 
above is not a c.d.f., it is an e.d.f.(extended distribution function) with 
$F(\infty) = 0$. Next, we look at theorems dealing with the limits of 
sequences of cumulative distribution functions and extended distribution 
functions.

    \section{The Helly-Bray Selection Principle}

\begin{theorem}[Helly-Bray Selection Principle]
    Every sequence of extended distribution functions $F_{n}$ has a 
subsequence $F_{n(k)}$ such that
    $F_{n(k)}\rightarrow F(x)$ for all continuity points $x$ of $F$ for some 
extended distribution function $F$.
\end{theorem}

    Before we prove the main theorem, we introduce the following Lemma:

\begin{lemma}
    Let $D\subset\R$ be dense. Let $F_{n}$ be a sequence of e.d.f.s such 
that $\lim_{n\rightarrow\infty}F_{n}(d)=F_{\infty}(d),\, \forall \, d\in D$.
    Then, $F_{n}\Rightarrow F_{\star}$ where $F_{\star}(x):=\inf_{x<d\in 
D}F_{\infty}(d)$.
\end{lemma}

    The proof of this lemma is left as an exercise to the reader. We now 
proceed to the proof of the Helly-Bray selection principle.

\begin{proof}
    Let $F_{n}$ be a sequence of e.d.f.s, and let $D=\{ d_{1},d_{2},\ldots 
\}$ be any countable, dense set.
    Using Cantor's diagonal argument, we can find a subsequence $n(k)$ such 
that
    $F_{n(k)}(d)\rightarrow F(d)$ for all $d\in D$. We then simply apply the 
previous lemma and see that
    $F_{n(k)}\Rightarrow F_{\star}$.
\end{proof}

    The Helly-Bray selection principle as stated above begs the question: 
under what conditions can we find a
    subsequence converging to a \emph{cumulative distribution function}? To 
answer this we first introduce the notion of tightness:

\begin{definition}[Tightness]
    A collection $B$ of proper distributions (c.d.f.s) $F$ is called 
\emph{tight} if
    $\lim_{x\rightarrow\infty}\sup_{F\in B}F(-x,x)^{c}=0$. In other words, 
if
    $F(-x,x)^{c}\rightarrow 0$ as $x\rightarrow\infty$ \emph{uniformly
over $F\in B$}.
\end{definition}

    Combining the property of tightness with the Helly-Bray selection 
principle, we get:

\begin{theorem}[Variation of the Helly-Bray Selection Principle]
    Every \emph{tight} sequence of proper distribution functions 
$F_{n}$ has a subsequence
    $F_{n(k)}$, such that $F_{n(k)}\Rightarrow F$ where $F$ is a 
\emph{proper} distribution function.
\end{theorem}

    The proof of this theorem is immediate from the Helly-Bray selection 
principle and the definition of tightness.

    \section{Application of Helly-Bray}

    We use the Helly-Bray selection principle for the following important 
application:

\begin{definition}
    A collection $D$ of bounded, continuous functions $f:\R\rightarrow\R$ 
is called a
    \emph{determining class} if every probability measure $P$ on $\R$ is 
characterized by
    the integrals $\int{f\,dP}$ for $f\in D$. (i.e. 
$\int{f\,dP}=\int{f\,dQ},\,\forall\, f\in D
    \Leftrightarrow P(-\infty,x] = Q(-\infty,x],\,\forall\, x\in\R)$.
\end{definition}

\begin{theorem}
    Let $D$ be a determining class and let $F_{n}$ be a sequence of 
probability measures on the line. If $\int{f\,dF_{n}}$
    converges to some limit for all $f\in D$ and $(F_{n})$ is tight, then
    $F_{n}\Rightarrow F$, where $F$ is the unique distribution determined by
    $\int{f\,dF} = \lim_{n\rightarrow\infty}\int{f\,dF_{n}}$ for all $f\in D$.
\end{theorem}

    Before we prove this theorem let us recall a lemma:

\begin{lemma}
    Let $x_{n}$ be a sequence in a metric space $S$. $\exists \, x\in S$ 
such that
    $x_{n}\rightarrow x\, \Leftrightarrow \, \forall$ subsequence
$x_{n(k)}$ there is a further subsequence
    $x_{n(k(l))}$ such that $x_{n(k(l))}$ converges to $x$.
\end{lemma}

The proof of this lemma is an easy exercise using proof by contradiction.  
Now we prove the theorem:

\begin{proof}
    For any subsequence $F_{n(k)}$ tightness and the Helly-Bray selection 
principle tells us that there is a further subsequence
    $F_{n(k(l))}$ such that $F_{n(k(l))}\Rightarrow G$ for some distribution 
function $G$. $F_{n(k(l))}\Rightarrow G$ in turn implies that $\int{f \, 
dG}=\lim_{l\rightarrow\infty}\int{f \, dF_{n(k(l))}}$
     for all bounded, countinuous $f$. So in particular the equality holds 
for all $f\in D$. 

But by our assumption,
     we know that $\lim_{l\rightarrow\infty}\int{f \, dF_{n(k(l))}} = \int{f 
\, dF}$.   These two statements together imply that $\int{f \, dG}= \int{f 
\, dF}$ for all $f\in D$. But $D$ is a determining class and so
     $\int{f \, dG}= \int{f \, dF}$ for all bounded, countinuous $f$. Thus 
we have that $\int{f \, dF}=\lim_{l\rightarrow\infty}\int{f \, 
dF_{n(k(l))}}$
     for all bounded, countinuous $f$. In other words, we know that 
$F_{n(k(l))}\Rightarrow F$.

     Now we recall that weak convergence is equivalent to convergence in the 
L\'evy metric (\cite{durrett}, chapter 2.3, exercise 2.15). We have thus shown 
that every 
subsequence of $F_{n}$ has a further subsequence converging to $F$. 
Since the L\'evy metric is a metric, our lemma gives us that $F_{n}\Rightarrow 
F$.

\end{proof}
\end{enumerate}

\bibliographystyle{plain}
\bibliography{../books.bib}


\end{document}